Pupil size light reflex to a light test is a potential test to determine a person is under the influence of THC and be able to be use as a field sobriety measures. The goals of the analysis to:
Pupil size light reflex to light does not habituated to THC use (smoking).
Time for smoking to post test maybe an important variable to model b/c:
There is a circadian pattern to pupil size (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7445830/).
Currently this field does not use FDA, so any FDA in topic area may be publishable
Data on pupil size during light reflex test. Pupil size was extracted at the image level based on image analysis techniques. Each test was performed simultaneously on right and left eye before and after THC use (smoking).
ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# str(ps)
#impute frame number
for(i in 2:(nrow(ps)-1)){
if(is.na(ps$frame[i]) & is.na(ps$frame[i+1] - ps$frame[i-1])){
ps$frame[i] <- ps$frame[i-1]+1
}else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) == 2){
ps$frame[i] <- ps$frame[i-1]+1
}else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) > 2){
if(abs(ps$percent_change[i] - ps$percent_change[i-1]) <
abs(ps$percent_change[i+1] - ps$percent_change[i])){
ps$frame[i] <- ps$frame[i-1]+1
}else(ps$frame[i] <- ps$frame[i+1]-1)
}
)
)
}
# total number of subjects
length(unique(ps$subject_id))
## [1] 101
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
# Demographics Table by User Category
table1(~ demo_age + demo_weight + demo_height + demo_gender|user_cat,
data = pt.df[pt.df$tp == "pre2", ])
| non-user (N=32) |
occasional (N=36) |
daily (N=31) |
Overall (N=99) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.9 (4.90) | 31.6 (4.98) | 33.1 (5.60) | 32.5 (5.15) |
| Median [Min, Max] | 33.5 [25.7, 42.2] | 30.1 [25.1, 41.9] | 32.3 [25.4, 45.3] | 32.0 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 171 (48.5) | 173 (33.4) | 174 (32.7) | 173 (38.4) |
| Median [Min, Max] | 165 [85.0, 284] | 170 [105, 240] | 170 [113, 250] | 170 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.83) | 69.6 (3.60) | 68.0 (3.54) | 68.6 (4.06) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [59.0, 76.0] | 69.0 [59.0, 78.0] |
| demo_gender | ||||
| Male | 13 (40.6%) | 23 (63.9%) | 17 (54.8%) | 53 (53.5%) |
| Female | 19 (59.4%) | 13 (36.1%) | 14 (45.2%) | 46 (46.5%) |
# number of subjects by timepoint (pre/post)
table(pt.df$tp)
##
## pre2 post
## 99 95
There are more subjects in total than the by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
I’m looking for any sharp declines over 10 frames after the 150th frame and then visualizing those trajectories to determine if there might be misalignment of the light test start frame.
ps$lagPctChg <- c(rep(0, 10), diff(ps$percent_change, lag = 10))
ps$lagID <- dplyr::lag(ps$subject_id, 10)
ps$lagtime <- dplyr::lag(ps$time, 10)
ps$lagEye <- dplyr::lag(ps$eye, 10)
ps$lagPctChg <- ifelse(ps$subject_id == ps$lagID & ps$time == ps$lagtime & ps$eye == ps$lagEye,
ps$lagPctChg, 0)
summary(ps$lagPctChg[ps$frame > 150])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.8625 -0.1763 0.3744 0.4622 1.1063 10.4196
hist(ps$lagPctChg[ps$frame > 150], breaks = 1000)
ps.150 <- ps[ps$frame > 150 & ps$eye == "Right", ]
pot.misaligned <- unique(ps.150[ps.150$lagPctChg <= -5, c("subject_id", "time", "user_type", "eye")])
pot.misaligned <- pot.misaligned[!(is.na(pot.misaligned$subject_id)), ]
pot.misaligned$misaligned <- 1
misaligned <- merge(ps, pot.misaligned,
by= c("subject_id", "time", "user_type", "eye"),
all.y = T)
mis.right <- misaligned[misaligned$eye == "Right", ]
misAlign.id <- unique(misaligned$subject_id)
for(i in misAlign.id){
print(ggplot(mis.right[mis.right$subject_id == i, ],
aes(x=frame, y = percent_change,
group = subject_id, color = i))+
geom_line()+
facet_grid(rows = vars(subject_id), cols = vars(tp))+
labs(title = paste0("Potential MisAligned Subject: ", i))+
theme_bw())
}
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_109.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-109", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_060.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-060", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
### New alignment view
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_007.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-007", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: start at 2nd bump?")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_033.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-033", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_038.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-038", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_043.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-043", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Both look odd, but mainly check pre; maybe review video")+
# theme_bw()
# dev.off()
Remove Outliers
## Remove known outliers
ps$outliers <- 0
ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of Pupil Size and Percent Change for “PRE” data by Eye and User Category
pre.df <- ps[ps$tp == "pre2", ]
ggplot(pre.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(pre.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title ="Pupil Size by Pre/Post and THC use category")+
theme_bw()
ggplot(right.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category")+
theme_bw()
Plots of Left and Right Eye for “POST” data. One pt has negative values for pupil size.
post.df <- ps[ps$tp == "post", ]
ggplot(post.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(post.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$pre_safetyscan_time_hr, ":", testTimes$pre_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_start_time_hr, ":", testTimes$consump_start_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_end_time_hr, ":", testTimes$consump_end_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$post_safetyscan_time_hr, ":", testTimes$post_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
pt.df <- merge(pt.df, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
names(pt.df)
## [1] "subject_id" "tp" "user_cat"
## [4] "demo_age" "demo_weight" "demo_height"
## [7] "demo_gender" "thc" "postConsumpTimeToTest"
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
names(pt.df)
## [1] "subject_id" "tp" "user_cat"
## [4] "demo_age" "demo_weight" "demo_height"
## [7] "demo_gender" "thc" "postConsumpTimeToTest"
Plot the Mean and +/- 1 SD functions for the percent change in pupil light reflex data for the pre and post data by THC user category. (Added code to input frame numbers when NA).
right_0.df <- right.df[right.df$frame >= 0, c("subject_id", "tp", "user_cat",
"frame", "percent_change")]
right_0.w <- reshape(right_0.df,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
rownames(right_0.w) <- paste0(right_0.w$subject_id, "_", right_0.w$tp)
pct_chg <- names(right_0.w[, 4:602])
mean_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
mean_fxn.l <- reshape(mean_fxn,
varying = pct_chg,
v.names = "percent_change",
timevar = "frame",
times = pct_chg,
direction = "long")
mean_fxn.l$frame <- as.numeric(gsub("percent_change.", "", mean_fxn.l$frame))
rownames(mean_fxn.l) <- NULL
mean_fxn.l$id <- NULL
names(mean_fxn.l)[names(mean_fxn.l) == "Group.1"] <- "tp"
names(mean_fxn.l)[names(mean_fxn.l) == "Group.2"] <- "user_cat"
mean_fxn.l$grp <- paste0(mean_fxn.l$tp, "_", mean_fxn.l$user_cat)
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 449 row(s) containing missing values (geom_path).
sd_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
NA’s are when there’s no data in for that time point and user category. Min frame value for NA is 480.
right_0.fpca <- fpca.face(Y = as.matrix(right_0.w[, 4:602]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0.fpca.pve <- round(right_0.fpca$evalues/sum(right_0.fpca$evalues)*100, 2)
mu <- right_0.fpca$mu
right_sd <- sqrt(right_0.fpca$evalues)
right_Phi <- right_0.fpca$efunctions
df_plot <- data.frame(frame = seq(0, 598, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
plot_pc1 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc2 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc3 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc4 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc1
PC1 plot: Individuals with low loading on PC1 (-2SD) have less constriction than the average and individuals with a high loading (+2SD) have more constriction. Rebound effect?
plot_pc2
PC2 plot: Overall shape of trajectory & pattern of recovery
Plot invdividuals with high/low PC2 overall scores.
right_scores <- right_0.fpca$scores
q.95 <- quantile(right_scores[, 2], p = 0.95)
highPC2 <- rownames(right_scores)[right_scores[, 2] > q.95]
highPC2.df <- data.frame(subject_id = substr(highPC2, 1, 7),
tp = substr(highPC2, 9,12))
for(i in 1:nrow(highPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == highPC2.df$subject_id[i] & right_0.df$tp == highPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for high PC2 score --", "Subject:", highPC2.df$subject_id[i], "Timepoint:", highPC2.df$tp[i]))+
theme_bw())
}
q.05 <- quantile(right_scores[, 2], p = 0.05)
lowPC2 <- rownames(right_scores)[right_scores[, 2] < q.05]
lowPC2.df <- data.frame(subject_id = substr(lowPC2, 1, 7),
tp = substr(lowPC2, 9,12))
for(i in 1:nrow(lowPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == lowPC2.df$subject_id[i] & right_0.df$tp == lowPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for low PC2 score --", "Subject:", lowPC2.df$subject_id[i], "Timepoint:", lowPC2.df$tp[i]))+
theme_bw())
}
plot_pc3
plot_pc4
Calculate the difference (post-pre) within subjects and plot by THC user category
right_0.pt <- reshape(right_0.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
ggplot(right_0.pt, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat))+
labs(title ="Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 10525 row(s) containing missing values (geom_path).
Non-user plot seems “centered” around 0 but still showing
heterogeneity. Lots of heterogeneity across user-groups, but a mostly
positive difference.
right_0.pt <- right_0.pt[, c("subject_id", "user_cat", "frame", "wiPctChg")]
right_0.pt.w <- reshape(right_0.pt,
timevar = "frame",
idvar = c("subject_id", "user_cat"),
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 3:601]))
rowsAllMissing <- names(allMissing)[allMissing == 599]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
rownames(right_0.pt.w) <- paste0(right_0.pt.w$subject_id, "_", right_0.pt.w$user_cat)
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 29 25
mean_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:601],
list(right_0.pt.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:601]
mean_fxn.pt.l <- reshape(mean_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
mean_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", mean_fxn.pt.l$frame))
rownames(mean_fxn.pt.l) <- NULL
mean_fxn.pt.l$id <- NULL
names(mean_fxn.pt.l)[names(mean_fxn.pt.l) == "Group.1"] <- "user_cat"
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 361 row(s) containing missing values (geom_path).
Difference in trajectories (post-pre), show a distinct difference between non-user and both user groups (up to frame 200), but the differences might not be significant. Harder to distinguish between the occasional and daily user trajectories.
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 29 25
sd_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:601],
list(right_0.pt.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:601]
sd_fxn.pt.l <- reshape(sd_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
sd_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", sd_fxn.pt.l$frame))
rownames(sd_fxn.pt.l) <- NULL
sd_fxn.pt.l$id <- NULL
names(sd_fxn.pt.l)[names(sd_fxn.pt.l) == "Group.1"] <- "user_cat"
sd_fxn.pt.l$wi_percent_change_neg <- -1*sd_fxn.pt.l$wi_percent_change
ggplot(sd_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
geom_line(aes(x=frame, y = wi_percent_change_neg, group = user_cat, color = user_cat),
linetype = "dashed")+
labs(title ="+/- 1 SD Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 422 row(s) containing missing values (geom_path).
## Removed 422 row(s) containing missing values (geom_path).
## number of missing data points in each column
missingnessCol <- apply(right_0.pt.w[, 3:601],
MARGIN = 2,
FUN = function(x) sum(is.na(x)))
sum(missingnessCol == 0) ## 140 columns without any missing data
## [1] 140
sum(missingnessCol == 84) ## 109 columns with all missing data
## [1] 0
sum(missingnessCol <= 68) ## 438 columns where at least 80% of participants have data
## [1] 440
hist(missingnessCol, breaks = 30)
missing.df <- data.frame(frame = seq(0, 598, by =1),
missingness = round(missingnessCol/nrow(right_0.pt.w)*100, 2),
stringsAsFactors = F)
ggplot(missing.df, aes(x=frame, y= missingness))+
geom_line()
Truncate within person difference data to frame 400 where missingness
reaches 50%.
Try to visualize missing data in within person data frame
wi_pct_chg <- names(right_0.pt.w)[3:601]
right_0.pt.l <- reshape(right_0.pt.w,
varying = wi_pct_chg,
v.names = "wi_pct_chg_diff",
timevar = 'frame',
times = wi_pct_chg,
direction = "long")
right_0.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", right_0.pt.l$frame))
rownames(right_0.pt.l) <- NULL
right_0.pt.l$id <- NULL
right_0.pt.l$notMissing <- ifelse(is.na(right_0.pt.l$wi_pct_chg_diff), 0, 1)
ggplot(right_0.pt.l, aes(x = as.factor(frame), y = subject_id, fill = as.factor(notMissing)))+
geom_raster(alpha = 0.8)+
scale_fill_manual(values = c("tomato3", 'steelblue'),
labels = c("Missing", "Present"))+
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust = 1))
right_0_wi.fpca <- fpca.face(Y = as.matrix(right_0.pt.w[, 3:401]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0_wi.fpca.pve <- round(right_0_wi.fpca$evalues/sum(right_0_wi.fpca$evalues)*100, 2)
mu <- right_0_wi.fpca$mu
right_sd <- sqrt(right_0_wi.fpca$evalues)
right_Phi <- right_0_wi.fpca$efunctions
wi.df_plot <- data.frame(frame = seq(0, 398, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5],
errband6 = 2*right_Phi[, 6]*right_sd[6])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
wi.plot_pc1 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0_wi.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc2 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0_wi.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc3 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0_wi.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc4 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0_wi.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc5 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0_wi.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc6 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband6, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband6, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC6:", "% Var Explained:", right_0_wi.fpca.pve[6]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc1
PC1: Individuals with high scores on PC1 have a weaker minimal constriction at post compared to pre. Individuals with low scores on PC1 have a stronger minimal constriction at post compared to pre.
wi.scores <- as.data.frame(right_0_wi.fpca$scores)
names(wi.scores) <- c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6")
wi.scores$id <- rownames(wi.scores)
ids <- str_split(wi.scores$id, "_", n=2, simplify = TRUE)
wi.scores$subject_id <- ids[, 1]
wi.scores$user_cat <- ids[, 2]
q.95 <- quantile(wi.scores$PC1, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC1 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC1 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 234 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC1 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC1, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC1 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC1 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 399 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC1 scores"))+
theme_bw())
}
wi.plot_pc2
PC2: Individuals with high loading on PC2 have a weaker initial constriction at post and a stronger rebound dilation past the 200th frame. Individuals with a low loading on PC2 have a stronger initial constriction at paost and a weaker rebound dilation past the 200th frame.
q.95 <- quantile(wi.scores$PC2, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC2 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC2 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 111 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC2 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC2, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC2 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC2 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 264 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC2 scores"))+
theme_bw())
}
wi.plot_pc3
wi.plot_pc4
wi.plot_pc5
wi.plot_pc6
### Within person difference PC explanations
### INVESTIGATE BUMPS IN MEAN Function
## Pre/Post Across subjects
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
xlim(50, 200)+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 2688 row(s) containing missing values (geom_path).
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "pre2", 103:105]
## percent_change.100 percent_change.101 percent_change.102
## 001-003_pre2 -39.841378 NA -40.056700
## 001-004_pre2 -20.509286 -20.636732 -20.765188
## 001-007_pre2 -29.728903 -30.372171 -30.985599
## 001-009_pre2 -12.595393 -12.384864 -12.173821
## 001-015_pre2 -27.419396 -27.277259 -27.112306
## 001-029_pre2 -22.422026 -22.324030 -22.238468
## 001-030_pre2 -19.904482 -19.730264 -19.562660
## 001-031_pre2 -26.659914 -26.707617 -26.723738
## 001-032_pre2 -25.607501 -25.790114 -25.972873
## 001-037_pre2 -10.766774 -10.595359 -10.422002
## 001-038_pre2 -14.582329 -14.220651 -13.880713
## 001-039_pre2 -8.698690 -8.442030 -8.215627
## 001-040_pre2 -13.961272 -13.965766 -13.969542
## 001-041_pre2 -22.801896 -23.144297 -23.480081
## 001-042_pre2 -22.334268 -22.256735 -22.200964
## 001-043_pre2 -11.625394 -11.267720 -10.929376
## 001-045_pre2 -24.484764 -23.910475 -23.359554
## 001-046_pre2 -39.228644 -39.117800 -39.005777
## 001-047_pre2 -8.537555 -8.481596 -8.428778
## 001-049_pre2 -20.116576 -19.984262 -19.863703
## 001-050_pre2 -29.870190 -29.993167 -30.145066
## 001-053_pre2 -31.777050 -31.811150 -31.839822
## 001-054_pre2 -19.003078 -18.907631 -18.830007
## 001-055_pre2 -22.701872 -23.326338 -23.938734
## 001-062_pre2 -5.394998 -5.303947 -5.217166
## 001-076_pre2 -43.094073 -42.798501 -42.471884
## 001-077_pre2 -9.357431 -9.313066 -9.234195
## 001-097_pre2 -9.338060 -9.588896 -9.851689
## 001-099_pre2 -32.517179 -32.697862 -32.884544
## 001-095_pre2 -18.898660 -18.708524 -18.520319
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "post", 101:103]
## percent_change.98 percent_change.99 percent_change.100
## 001-003_post -27.753056 -27.913264 -28.065082
## 001-005_post -21.013709 -20.832539 -20.645492
## 001-007_post -18.738419 -18.445158 -18.149873
## 001-009_post -15.268272 -15.431165 -15.637139
## 001-015_post -23.671938 -23.558792 -23.444028
## 001-029_post -15.334405 -15.258121 -15.180255
## 001-031_post -32.884257 -32.661430 -32.418305
## 001-032_post -17.526118 -17.369644 -17.229585
## 001-037_post -7.295840 -7.301405 -7.306222
## 001-038_post -14.328061 -14.119133 -13.920714
## 001-039_post -6.300429 -6.357287 -6.401905
## 001-040_post -9.856376 -9.737562 -9.617909
## 001-041_post -11.783820 -11.723059 -11.672176
## 001-042_post -23.864704 -23.870622 -23.857456
## 001-043_post -9.449283 -9.492914 -9.533265
## 001-045_post -46.643347 -46.578226 -46.528424
## 001-046_post -24.943494 -24.785338 -24.634926
## 001-047_post -5.657225 -5.615010 -5.597963
## 001-049_post -12.144504 -12.199826 -12.238348
## 001-050_post -32.971581 -33.069765 -33.122365
## 001-053_post -18.045911 -17.931763 -17.810962
## 001-054_post -27.036703 -27.191479 -27.350672
## 001-055_post -12.115957 -11.958694 -11.809640
## 001-062_post -8.366234 -8.281283 -8.206952
## 001-076_post -51.349015 NA -51.523083
## 001-077_post -3.120474 -3.135075 -3.153623
## 001-097_post -14.253318 -13.977159 -13.698777
## 001-099_post -15.639890 -15.662828 -15.693056
## 001-004_post -5.257832 -5.198141 -5.149285
## 001-030_post -12.088334 -12.022693 -11.954151
## Within person plots
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
xlim(50, 200)+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 1344 row(s) containing missing values (geom_path).
## Mean function Spike in Occassional user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "occasional", 113:119]
## wiPctChg.112 wiPctChg.113 wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 2 10.21026 10.19975 10.18894 9.336816 9.651207 10.6294
## wiPctChg.118
## 2 10.15556
right_0.pt.w[right_0.pt.w$user_cat == "occasional", 116:119]
## wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 001-006_occasional 11.5010736 11.5256630 11.5441500 11.5567771
## 001-011_occasional 13.8610423 13.4068052 12.9843136 12.5971631
## 001-014_occasional 21.3107326 21.4515436 21.6131807 21.7968208
## 001-021_occasional -1.1912416 -0.9231421 -0.6790802 -0.4625531
## 001-026_occasional 9.7779962 9.7134617 9.6452439 9.5739134
## 001-033_occasional -4.5904810 -4.5847367 -4.5874266 -4.5993762
## 001-066_occasional -2.9926603 -2.6954025 -2.4078270 -2.1356420
## 001-068_occasional -2.3234722 -2.5620425 -2.8047079 NA
## 001-069_occasional 10.3751083 10.3261663 10.2674197 10.2007111
## 001-070_occasional -6.3680231 -6.6819575 -6.9642840 -7.2115993
## 001-073_occasional 1.0988326 0.8742189 0.6410650 0.3974723
## 001-079_occasional 1.5458615 1.5596127 1.5732513 1.5872532
## 001-098_occasional 13.7185797 13.7309383 13.7531044 13.7858458
## 001-103_occasional 13.3247626 13.1186223 12.9151765 12.7150573
## 001-104_occasional 12.7854841 13.1440297 13.4777229 13.7848223
## 001-105_occasional 5.5596362 5.0371812 4.5128967 3.9884444
## 001-106_occasional 31.6685053 31.4176485 31.1680513 30.9234781
## 001-107_occasional 13.0855226 13.1838102 13.2833605 13.3842042
## 001-108_occasional 0.6176109 0.8447543 1.0849742 1.3358006
## 001-110_occasional 20.8014945 20.6608085 20.5292836 20.4062968
## 001-111_occasional 24.6366830 24.5955681 NA 24.5397456
## 001-112_occasional 5.4392987 5.2952310 5.1482113 5.0007441
## 001-113_occasional 4.3581125 4.6115089 4.8917672 5.1963956
## 001-114_occasional 8.3133753 NA 9.0936072 9.6649670
## 001-116_occasional 9.5616376 10.2337297 10.9367739 11.6657409
## 001-117_occasional 26.0633785 25.9994142 25.9175125 25.8186124
## 001-118_occasional 22.3015542 22.0631247 21.8292447 21.6023908
## 001-061_occasional -3.1586454 -3.2525226 -3.3458427 -3.4373346
## 001-074_occasional 34.3975966 NA 34.2126412 33.9469985
## Mean function Spike in Daily user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "daily", 133:149]
## wiPctChg.132 wiPctChg.133 wiPctChg.134 wiPctChg.135 wiPctChg.136 wiPctChg.137
## 3 9.277331 9.745381 9.979741 9.761392 9.535792 10.07879
## wiPctChg.138 wiPctChg.139 wiPctChg.140 wiPctChg.141 wiPctChg.142 wiPctChg.143
## 3 10.10521 10.12753 9.170105 9.967127 10.84453 12.05176
## wiPctChg.144 wiPctChg.145 wiPctChg.146 wiPctChg.147 wiPctChg.148
## 3 9.733335 10.18234 10.17635 10.1437 10.15056
right_0.pt.w[right_0.pt.w$user_cat == "daily", 143:146]
## wiPctChg.141 wiPctChg.142 wiPctChg.143 wiPctChg.144
## 001-002_daily NA 14.7153791 14.601849 14.495408
## 001-008_daily 14.7396420 14.7359309 14.729108 14.719145
## 001-010_daily 11.4783127 11.7425889 12.013432 12.288979
## 001-012_daily 33.4153783 33.3457661 33.274294 33.200908
## 001-018_daily 0.7302098 0.8743156 1.019096 1.163107
## 001-022_daily 19.1083615 19.0395858 18.998221 18.982808
## 001-024_daily 3.7780247 3.7627633 NA 3.822039
## 001-027_daily -1.7850082 -1.7402579 -1.684847 -1.620898
## 001-044_daily 10.2479799 10.1786281 10.105654 10.028880
## 001-056_daily 3.6104524 3.6402340 3.666943 3.690611
## 001-063_daily 20.4580365 20.7316590 20.963443 NA
## 001-064_daily 2.7132570 NA 3.009631 3.197956
## 001-067_daily 11.1802980 11.1078556 11.034733 10.960500
## 001-075_daily 15.0383830 15.2525116 15.432585 15.575752
## 001-081_daily 20.1667975 19.9577148 19.758334 19.568175
## 001-083_daily 6.5437739 6.7744270 NA 7.279460
## 001-085_daily 2.4370639 2.4914371 NA 2.661117
## 001-086_daily 15.4687276 15.3217226 15.147036 14.945924
## 001-088_daily 18.6395264 18.5982227 18.530299 18.439520
## 001-090_daily 12.8303747 13.4777605 14.106619 14.712315
## 001-091_daily 2.7300374 2.3344146 1.952296 1.587665
## 001-093_daily 10.2499191 10.0865217 9.897907 9.687709
## 001-094_daily -11.5874056 -11.7885342 NA -12.022410
## 001-096_daily 15.0428194 14.7835467 14.512852 14.232459
## 001-013_daily 1.9760770 NA 2.017467 2.002911
Jagged changes in the mean function lines seems to stem from missing data at those frames in the data set. The subjects per user-group is between 25 - 30, so missing data for one individual has a noticeable effect on mean function line.
post_right_0.w <- right_0.w[right_0.w$tp == "post", ]
post_right_0.fpca <- fpca.face(Y = as.matrix(post_right_0.w[, 4:602]), pve = 0.95, knots = 30, var = T)
post_right_0.fpca.pve <- round(post_right_0.fpca$evalues/sum(post_right_0.fpca$evalues)*100, 2)
post_right_scores <- as.data.frame(post_right_0.fpca$scores)
names(post_right_scores) <- c("PC1", "PC2", "PC3", "PC4")
post_right_scores$id <- rownames(post_right_scores)
post_right_scores$subject_id <- substr(post_right_scores$id, 1, 7)
post_right_scores$tp <- substr(post_right_scores$id, 9, 12)
pt.scores <- merge(post_right_scores, pt.df,
by = c("subject_id", "tp"),
all.x = T)
pt.scores$smoker <- ifelse(pt.scores$user_cat %in% c("daily", "occasional"),1, 0)
m1 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.scores)
summary(m1)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8874 -1.0671 0.6905 0.8993 1.3757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7283989 0.2429538 2.998 0.00272 **
## PC1 -0.0007067 0.0015898 -0.445 0.65667
## PC2 -0.0077840 0.0050885 -1.530 0.12608
## PC3 0.0179608 0.0078338 2.293 0.02186 *
## PC4 0.0080786 0.0091308 0.885 0.37628
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 112.93 on 87 degrees of freedom
## Residual deviance: 103.23 on 83 degrees of freedom
## AIC: 113.23
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred <- predict(m1, pt.scores)
pt.scores.roc <- roc(response = pt.scores$smoker, predictor = pt.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 30 controls (pt.scores$smoker 0) < 58 cases (pt.scores$smoker 1).
## Area under the curve: 0.6667
plot.roc(pt.scores.roc)
wi.scores$user_cat <- NULL
pt.wi.scores <- merge(wi.scores, pt.df[pt.df$tp == "post", ],
by = "subject_id",
all.x = T)
pt.wi.scores$smoker <- ifelse(pt.wi.scores$user_cat %in% c("daily", "occasional"),1, 0)
m2 <- glm(smoker ~ PC1 + PC2 + PC3 + PC4+ PC5+ PC6, family = "binomial", data = pt.wi.scores)
summary(m2)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0000 -0.8940 0.4882 0.9015 1.5602
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.809193 0.276593 2.926 0.00344 **
## PC1 0.004114 0.001955 2.105 0.03533 *
## PC2 0.006158 0.003991 1.543 0.12278
## PC3 -0.013195 0.005702 -2.314 0.02067 *
## PC4 -0.008717 0.007334 -1.189 0.23460
## PC5 -0.014933 0.008988 -1.662 0.09661 .
## PC6 0.007149 0.010622 0.673 0.50094
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 89.431 on 76 degrees of freedom
## AIC: 103.43
##
## Number of Fisher Scoring iterations: 5
pt.wi.scores$pred <- predict(m2, pt.wi.scores)
pt.wi.scores.roc <- roc(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
##
## Data: pt.wi.scores$pred in 29 controls (pt.wi.scores$smoker 0) < 54 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7586
plot.roc(pt.wi.scores.roc)
sample <- intersect(pt.scores$subject_id, pt.wi.scores$subject_id)
m3 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.scores[pt.scores$subject_id %in% sample, ])
summary(m3)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores[pt.scores$subject_id %in% sample, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8464 -1.1162 0.7245 0.9123 1.3931
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.675796 0.246002 2.747 0.00601 **
## PC1 -0.001012 0.001611 -0.628 0.52973
## PC2 -0.006870 0.005005 -1.373 0.16984
## PC3 0.016745 0.007787 2.150 0.03153 *
## PC4 0.006846 0.009167 0.747 0.45514
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 99.345 on 78 degrees of freedom
## AIC: 109.34
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred_sameN <- predict(m3, pt.scores)
pt.scores.roc2 <- roc(response = pt.scores$smoker[pt.scores$subject_id %in% sample],
predictor = pt.scores$pred_sameN[pt.scores$subject_id %in% sample])
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc2
##
## Call:
## roc.default(response = pt.scores$smoker[pt.scores$subject_id %in% sample], predictor = pt.scores$pred_sameN[pt.scores$subject_id %in% sample])
##
## Data: pt.scores$pred_sameN[pt.scores$subject_id %in% sample] in 29 controls (pt.scores$smoker[pt.scores$subject_id %in% sample] 0) < 54 cases (pt.scores$smoker[pt.scores$subject_id %in% sample] 1).
## Area under the curve: 0.6488
plot.roc(pt.scores.roc2)
m5 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.wi.scores[pt.wi.scores$subject_id %in% sample, ])
summary(m5)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores[pt.wi.scores$subject_id %in% sample,
## ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8653 -1.0348 0.5976 0.9036 1.6448
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.753259 0.261977 2.875 0.00404 **
## PC1 0.003940 0.001932 2.040 0.04137 *
## PC2 0.005630 0.003762 1.496 0.13455
## PC3 -0.013747 0.005807 -2.368 0.01791 *
## PC4 -0.008183 0.007178 -1.140 0.25429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.41 on 82 degrees of freedom
## Residual deviance: 92.67 on 78 degrees of freedom
## AIC: 102.67
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_sameN <- predict(m5, pt.wi.scores)
pt.wi.scores.roc2 <- roc(response = pt.wi.scores$smoker[pt.wi.scores$subject_id %in% sample],
predictor = pt.wi.scores$pred_sameN[pt.wi.scores$subject_id %in% sample])
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc2
##
## Call:
## roc.default(response = pt.wi.scores$smoker[pt.wi.scores$subject_id %in% sample], predictor = pt.wi.scores$pred_sameN[pt.wi.scores$subject_id %in% sample])
##
## Data: pt.wi.scores$pred_sameN[pt.wi.scores$subject_id %in% sample] in 29 controls (pt.wi.scores$smoker[pt.wi.scores$subject_id %in% sample] 0) < 54 cases (pt.wi.scores$smoker[pt.wi.scores$subject_id %in% sample] 1).
## Area under the curve: 0.7178
plot.roc(pt.wi.scores.roc2)
pt.scores$male <- ifelse(pt.scores$demo_gender == "Male", 1, 0)
pt.scores$BMI <- pt.scores$demo_weight*703/(pt.scores$demo_height)^2
pt.wi.scores$male <- ifelse(pt.wi.scores$demo_gender == "Male", 1, 0)
pt.wi.scores$BMI <- pt.wi.scores$demo_weight*703/(pt.wi.scores$demo_height)^2
var.int <- c("PC1", "PC2", "PC3", "PC4",
"demo_age", "demo_weight", "demo_height",
"male", "thc",
"postConsumpTimeToTest", "smoker")
sum(names(pt.scores) %in% var.int)
## [1] 11
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age", "demo_weight", "demo_height",
"male", "thc",
"postConsumpTimeToTest", "smoker")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
post.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7433 -4.5095 -0.1779 3.3156 12.4424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.328581 0.544501 59.373 <2e-16 ***
## PC1 0.003638 0.003362 1.082 0.2823
## PC2 0.017841 0.010074 1.771 0.0802 .
## PC3 -0.014238 0.016978 -0.839 0.4041
## PC4 0.004196 0.019829 0.212 0.8329
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.107 on 83 degrees of freedom
## Multiple R-squared: 0.05797, Adjusted R-squared: 0.01257
## F-statistic: 1.277 on 4 and 83 DF, p-value: 0.2857
post.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -81.007 -31.174 -2.778 21.534 100.079
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.78869 4.17375 41.159 <2e-16 ***
## PC1 0.02279 0.02577 0.885 0.379
## PC2 0.07283 0.07722 0.943 0.348
## PC3 -0.10133 0.13014 -0.779 0.438
## PC4 -0.03284 0.15199 -0.216 0.829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.15 on 83 degrees of freedom
## Multiple R-squared: 0.02715, Adjusted R-squared: -0.01974
## F-statistic: 0.579 on 4 and 83 DF, p-value: 0.6787
post.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4623 -3.1665 -0.1085 2.9379 9.5038
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.623035 0.440086 155.931 <2e-16 ***
## PC1 0.003362 0.002717 1.237 0.219
## PC2 -0.010681 0.008142 -1.312 0.193
## PC3 -0.002450 0.013722 -0.179 0.859
## PC4 -0.012663 0.016026 -0.790 0.432
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.128 on 83 degrees of freedom
## Multiple R-squared: 0.04493, Adjusted R-squared: -0.001095
## F-statistic: 0.9762 on 4 and 83 DF, p-value: 0.4251
post.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.0155 -3.3491 -0.5236 2.8427 9.2691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.479889 0.469509 54.269 <2e-16 ***
## PC1 0.001283 0.002899 0.443 0.6591
## PC2 0.018735 0.008687 2.157 0.0339 *
## PC3 -0.008918 0.014640 -0.609 0.5441
## PC4 0.008979 0.017098 0.525 0.6009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.404 on 83 degrees of freedom
## Multiple R-squared: 0.06298, Adjusted R-squared: 0.01782
## F-statistic: 1.395 on 4 and 83 DF, p-value: 0.243
post.thc.m <- lm(thc ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.thc.m)
##
## Call:
## lm(formula = thc ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.712 -17.632 -10.603 8.416 124.710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.665047 3.899715 5.043 4.81e-06 ***
## PC1 -0.024583 0.028273 -0.869 0.388
## PC2 0.001937 0.083178 0.023 0.981
## PC3 0.123096 0.150876 0.816 0.418
## PC4 0.047819 0.164618 0.290 0.772
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.37 on 58 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.02384, Adjusted R-squared: -0.04348
## F-statistic: 0.3541 on 4 and 58 DF, p-value: 0.8401
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.956 -3.260 -0.358 3.157 19.011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.585200 0.782396 79.992 <2e-16 ***
## PC1 0.005490 0.005444 1.008 0.318
## PC2 0.020016 0.016031 1.249 0.217
## PC3 -0.035414 0.030880 -1.147 0.257
## PC4 -0.021805 0.032898 -0.663 0.510
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.688 on 53 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.07212, Adjusted R-squared: 0.002092
## F-statistic: 1.03 on 4 and 53 DF, p-value: 0.4005
post.smoker.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.scores)
summary(post.smoker.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8350 -1.2045 0.7295 1.1009 1.3341
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.310861 0.225081 1.381 0.1672
## PC1 0.002979 0.001540 1.934 0.0531 .
## PC2 -0.004577 0.004861 -0.942 0.3463
## PC3 -0.002204 0.007573 -0.291 0.7710
## PC4 0.006793 0.009033 0.752 0.4521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120.35 on 87 degrees of freedom
## Residual deviance: 114.67 on 83 degrees of freedom
## AIC: 124.67
##
## Number of Fisher Scoring iterations: 4
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age", "demo_weight", "demo_height",
"male", "thc",
"postConsumpTimeToTest", "smoker")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
wi.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5547 -3.2751 -0.7433 2.7156 13.7134
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.882898 0.543129 58.702 <2e-16 ***
## PC1 0.001020 0.003683 0.277 0.783
## PC2 -0.010495 0.007292 -1.439 0.154
## PC3 0.004444 0.010111 0.440 0.661
## PC4 -0.017603 0.013310 -1.323 0.190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.946 on 78 degrees of freedom
## Multiple R-squared: 0.04981, Adjusted R-squared: 0.001079
## F-statistic: 1.022 on 4 and 78 DF, p-value: 0.4012
wi.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -88.967 -25.318 -3.861 19.517 100.072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.17090 4.14600 41.286 <2e-16 ***
## PC1 0.05410 0.02812 1.924 0.058 .
## PC2 -0.07034 0.05566 -1.264 0.210
## PC3 0.07956 0.07718 1.031 0.306
## PC4 -0.15035 0.10160 -1.480 0.143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.76 on 78 degrees of freedom
## Multiple R-squared: 0.09853, Adjusted R-squared: 0.0523
## F-statistic: 2.131 on 4 and 78 DF, p-value: 0.08475
wi.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0024 -3.2443 0.1415 2.2602 8.9463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.655644 0.459280 149.485 <2e-16 ***
## PC1 0.002068 0.003115 0.664 0.509
## PC2 -0.006438 0.006166 -1.044 0.300
## PC3 0.004222 0.008550 0.494 0.623
## PC4 -0.016340 0.011255 -1.452 0.151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.182 on 78 degrees of freedom
## Multiple R-squared: 0.04734, Adjusted R-squared: -0.001518
## F-statistic: 0.9689 on 4 and 78 DF, p-value: 0.4294
wi.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.716 -2.721 -1.009 2.568 8.732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.362698 0.478822 52.969 <2e-16 ***
## PC1 0.006205 0.003247 1.911 0.0597 .
## PC2 -0.005958 0.006429 -0.927 0.3569
## PC3 0.008742 0.008914 0.981 0.3298
## PC4 -0.010993 0.011734 -0.937 0.3517
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.36 on 78 degrees of freedom
## Multiple R-squared: 0.07508, Adjusted R-squared: 0.02765
## F-statistic: 1.583 on 4 and 78 DF, p-value: 0.1872
wi.thc.m <- lm(thc ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.thc.m)
##
## Call:
## lm(formula = thc ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.803 -15.427 -11.194 0.051 124.098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.90239 3.98476 4.744 1.58e-05 ***
## PC1 0.02753 0.03003 0.917 0.363
## PC2 -0.02135 0.05862 -0.364 0.717
## PC3 -0.02534 0.07666 -0.331 0.742
## PC4 0.03575 0.09238 0.387 0.700
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.6 on 54 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.02253, Adjusted R-squared: -0.04987
## F-statistic: 0.3112 on 4 and 54 DF, p-value: 0.8693
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6004 -3.2229 -0.2428 2.2992 20.5976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.725821 0.802483 76.919 <2e-16 ***
## PC1 0.003313 0.005764 0.575 0.5681
## PC2 0.013080 0.011889 1.100 0.2766
## PC3 -0.026501 0.015606 -1.698 0.0958 .
## PC4 0.004978 0.017923 0.278 0.7824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.601 on 49 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08099, Adjusted R-squared: 0.005971
## F-statistic: 1.08 on 4 and 49 DF, p-value: 0.3768
wi.smoker.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.wi.scores)
summary(wi.smoker.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6076 -1.1858 0.6695 1.0224 1.6609
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3578612 0.2347489 1.524 0.1274
## PC1 0.0023717 0.0016444 1.442 0.1492
## PC2 -0.0070727 0.0036153 -1.956 0.0504 .
## PC3 0.0055780 0.0049014 1.138 0.2551
## PC4 -0.0004479 0.0060586 -0.074 0.9411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113.02 on 82 degrees of freedom
## Residual deviance: 105.80 on 78 degrees of freedom
## AIC: 115.8
##
## Number of Fisher Scoring iterations: 4
2.1 Comments for Ben